#scale_fill_manual(values=c("white", "gray60")) +
scale_x_continuous(breaks=seq(1, 4, 1),
labels=c("Not enough,\nMajor\ndifficulties",
"Not enough,\nSome\ndifficulties",
"Enough,\nSome\ndifficulties",
"Enough,\nSavings")) +
theme(
panel.background = element_blank(),
legend.position = "none",
panel.border = element_rect(colour = "gray", fill=NA, size=.8)
)
# Partisanship
partisanship_ven <- ggplot(df_ven[is.na(df_ven$partisanship)==FALSE,],
aes(x=partisanship)) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
ggtitle("Partisanship") +
#ylim(0,1) +
xlab("") +
ylab("Proportion") +
#scale_fill_manual(values=c("white", "gray60")) +
scale_x_continuous(breaks=seq(1, 3, 1),
labels=c("Left",
"Center",
"Right")) +
theme(
panel.background = element_blank(),
legend.position = "none",
panel.border = element_rect(colour = "gray", fill=NA, size=.8)
)
# Presidential Vote
## something wrong with responses, on scale from 1 to 5
# Benefits
benefits_ven <- ggplot(df_ven, aes(x=benefits_index)) +
geom_histogram(aes(y=..density..), binwidth=1, colour="white") +
#geom_density(alpha=.2) +
ggtitle("Number of Public Benefits") +
xlab("") +
ylab("Density") +
#ylim(0, .15) +
scale_x_continuous(breaks=seq(0, 5, 1)) +
theme(
panel.background = element_blank(),
legend.position = "none",
panel.border = element_rect(colour = "gray", fill=NA, size=.8)
)
# Main Reason for Leaving Venezuela
reasonleave_ven <- ggplot(df_ven[is.na(df_ven$reason_leave_num1)==FALSE &
df_ven$reason_leave_num1 < 11,],
aes(x=reason_leave_num1)) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
ggtitle("Main Reason for Leaving Venezuela") +
#ylim(0,1) +
xlab("") +
ylab("Proportion") +
#scale_fill_manual(values=c("white", "gray60")) +
scale_x_continuous(breaks=seq(1, 10, 1),
labels=c("Economic",
"Security",
"Unemployment",
"Food",
"Medicine",
"Better life",
"Join family",
"Pregnant",
"Help family",
"Education")) +
theme(
panel.background = element_blank(),
legend.position = "none",
panel.border = element_rect(colour = "gray", fill=NA, size=.8)
)
## Plot
(city_ven + gender_ven + age_ven + race_ven + plot_layout(ncol=4)) /
(edu_ven + kids_ven + marriage_ven + religion_ven + plot_layout(ncol=4)) /
(religiosity_ven + wealth_ven + contract_ven + salary_ven + plot_layout(ncol=4)) /
(partisanship_ven + benefits_ven + reasonleave_ven + plot_layout(ncol=3,widths=c(1,1,2)))
# Chunk 7: lapopbalance_colombians
## Make Colombia Table
###set vars as either numeric or factor
col.merge <- col.merge %>%
mutate_at(c("male", "kids", "pol_ideo",
"wealth_car", "wealth_washing", "wealth_tv",
"wealth_computer", "wealth_cell",
"wealth_internet","contract"),~as.numeric(as.character(.x)))
col.merge <- col.merge %>%
mutate_at(c("age", "marriage", "education", "race",
"salary", "religion2", "religiosity", "job"),~as.factor(.x))
## Create long dataset - numeric
col.merge.num <- col.merge %>%
dplyr::select(male, kids, pol_ideo, wealth_car, wealth_washing,
wealth_tv, wealth_computer, wealth_cell,
wealth_internet, contract, lapop) %>%
pivot_longer(cols = -lapop, names_to = "variable", values_to = "value") %>%
drop_na()
bal.tab.num.col <- col.merge.num %>%
group_by(variable) %>%
do({
ttest_out <- t.test(value ~ lapop, data = .)
n_lapop <- sum(.$lapop)
n_hpz <- nrow(.) - n_lapop
data.frame(mean_lapop = ttest_out$estimate[1],
#se_lapop = sqrt(var(.$value[.$lapop == 1]) / n_lapop),
mean_hpz = ttest_out$estimate[2],
#se_hpz = sqrt(var(.$value[.$lapop == 0]) / n_hpz),
diff_means = ttest_out$estimate[1] - ttest_out$estimate[2],
pval_difference = ttest_out$p.value)
}) %>%
rename(`Variable` = variable,
`LAPOP Mean` = mean_lapop,
#`LAPOP SE` = se_lapop,
`Sample Mean` = mean_hpz,
#`Sample SE` = se_hpz,
`Diff. in Means` = diff_means,
`P-Value` = pval_difference) %>%
mutate(Variable = case_when(Variable == "contract"~"Formal Contract",
Variable == "kids"~"Number of Children",
Variable == "male"~"Male",
Variable == "pol_ideo"~"Political Ideology (1 left - 10 right)",
Variable == "wealth_car"~"Owns Car",
Variable == "wealth_cell"~"Owns Cellphone",
Variable == "wealth_computer"~"Owns Computer",
Variable == "wealth_internet"~"Access to Internet",
Variable == "wealth_tv"~"Owns TV",
Variable == "wealth_washing"~"Owns Washing Machine"
)) %>%
kable(align = "lccc",
caption = "Comparing LAPOP and our Sample in Colombia: Numeric Variables",
label ="tab:lapop_col_num") %>%
kable_styling(bootstrap_options = "striped")
## Add Variable Labels
col.merge <-col.merge %>% mutate(age = factor(age,
levels = 1:5,
labels = c("18-24",
"25-34",
"35-44",
"45-54",
"55 or above")),
marriage   = factor(marriage  ,
levels = 1:5,
labels = c("Single",
"Married",
"Separated/Divorced",
"Widow",
"Civil Union")),
education   = factor(education,
levels = 0:4,
labels = c("None",
"Primary",
"High School",
"University/Technical",
"Masters/PhD")),
race   = factor(race,
levels = 1:6,
labels = c("White",
"Mestizo",
"Indigenous",
"Black",
"Mulatto",
"Other")),
salary   = factor(salary,
levels = 1:4,
labels = c("Not Enough, Major Difficulties",
"Not Enough, Some Difficulties" ,
"Enough Without Great Difficulty",
"Enough and Can Save")),
religion2  = factor(religion2,
levels = 1:3,
labels = c("Catholic",
"Protestant" ,
"Other")),
religiosity  = factor(religiosity,
levels = 0:3,
labels = c("Unimportant",
"Somewhat Unimportant",
"Somewhat Important",
"Very Important")),
job  = factor(job,
levels = 1:4,
labels = c("Not Seeking Work",
"Looking for Work",
"Student",
"Employed"))
)
## Create long dataset - categorical
col.merge.cat <- col.merge %>%
dplyr::select(lapop, age, marriage, education, race, salary, religion2, religiosity, job) %>%
pivot_longer(cols = -lapop, names_to = "variable", values_to = "value") %>%
drop_na()
bal.tab.cat.col <- col.merge.cat %>%
group_by(variable) %>%
do({
unique_vals <- unique(.$value)
pval_out <- map_df(unique_vals, function(x){
x_lapop <- sum(.$value[.$lapop == 1] == x)
x_hpz <- sum(.$value[.$lapop == 0] == x)
n_lapop <- sum(.$lapop)
n_hpz <- nrow(.) - n_lapop
ptest_out <- prop.test(x = c(x_lapop, x_hpz), n = c(n_lapop, n_hpz))
data.frame(value = x,
mean_lapop = ptest_out$estimate[1],
#se_lapop = sqrt(ptest_out$estimate[1] * (1 - ptest_out$estimate[1]) / n_lapop),
mean_hpz = ptest_out$estimate[2],
#se_hpz = sqrt(ptest_out$estimate[2] * (1 - ptest_out$estimate[2]) / n_hpz),
diff_means = ptest_out$estimate[1] - ptest_out$estimate[2],
pval_difference = ptest_out$p.value) %>%
mutate(value = as.factor(value))
}) %>%
arrange(value)
pval_out
})  %>%
rename(`Variable` = variable,
`Category` = value,
`LAPOP Mean` = mean_lapop,
#`LAPOP SE` = se_lapop,
`Sample Mean` = mean_hpz,
#`Sample SE` = se_hpz,
`Diff. in Means` = diff_means,
`P-Value` = pval_difference) %>%
mutate(Variable = case_when(Variable == "age"~"Age",
Variable == "education"~"Education",
Variable == "job"~"Employment",
Variable == "marriage"~"Marriage",
Variable == "race"~"Race",
Variable == "religion2"~"Religion",
Variable == "religiosity"~"Religiosity",
Variable == "salary"~"Salary"
)) %>%
kable(align = "lccc",
caption = "Comparing LAPOP and our Sample in Colombia: Categorical Variables",
label ="tab:lapop_col_cat") %>%
kable_styling(bootstrap_options = "striped")
# show tables
bal.tab.num.col %>%
kable_styling(latex_options = c("HOLD_position"), font_size = 9)
bal.tab.cat.col %>%
kable_styling(latex_options = c("HOLD_position"), font_size = 9)
# Chunk 8: lapopbalance_venezuelans
######     Venezuela Table   #####
###set vars as either numeric or factor
ven.merge <- ven.merge %>%
mutate_at(c("male", "kids", "pol_ideo",
"wealth_car_ven", "wealth_washing_ven", "wealth_tv_ven",
"wealth_computer_ven", "wealth_cell_ven",
"wealth_internet_ven"),~as.numeric(as.character(.x)))
ven.merge <- ven.merge %>%
mutate_at(c("age", "marriage", "education", "race",
"salary", "religion2", "religiosity", "job"),~as.factor(.x))
## Create long dataset - numeric
ven.merge.num <- ven.merge %>%
dplyr::select(male, kids, pol_ideo, wealth_car_ven, wealth_washing_ven,
wealth_tv_ven, wealth_computer_ven, wealth_cell_ven,
wealth_internet_ven, lapop) %>%
pivot_longer(cols = -lapop, names_to = "variable", values_to = "value") %>%
drop_na()
bal.tab.num.ven <- ven.merge.num %>%
group_by(variable) %>%
do({
ttest_out <- t.test(value ~ lapop, data = .)
n_lapop <- sum(.$lapop)
n_hpz <- nrow(.) - n_lapop
data.frame(mean_lapop = ttest_out$estimate[1],
#se_lapop = sqrt(var(.$value[.$lapop == 1]) / n_lapop),
mean_hpz = ttest_out$estimate[2],
#se_hpz = sqrt(var(.$value[.$lapop == 0]) / n_hpz),
diff_means = ttest_out$estimate[1] - ttest_out$estimate[2],
pval_difference = ttest_out$p.value)
}) %>%
rename(`Variable` = variable,
`LAPOP Mean` = mean_lapop,
#`LAPOP SE` = se_lapop,
`Sample Mean` = mean_hpz,
#`Sample SE` = se_hpz,
`Diff. in Means` = diff_means,
`P-Value` = pval_difference) %>%
mutate(Variable = case_when(Variable == "kids"~"Number of Children",
Variable == "male"~"Male",
Variable == "pol_ideo"~"Political Ideology (1 left - 10 right)",
Variable == "wealth_car_ven"~"Owns Car",
Variable == "wealth_cell_ven"~"Owns Cellphone",
Variable == "wealth_computer_ven"~"Owns Computer",
Variable == "wealth_internet_ven"~"Access to Internet",
Variable == "wealth_tv_ven"~"Owns TV",
Variable == "wealth_washing_ven"~"Owns Washing Machine"
)) %>%
kable(align = "lccc",
caption = "Comparing LAPOP and our Sample in Venezuela: Numeric Variables",
label ="tab:lapop_ven_num") %>%
kable_styling(bootstrap_options = "striped")
## Add Variable Labels
ven.merge<-ven.merge %>% mutate(age = factor(age,
levels = 1:5,
labels = c("18-24",
"25-34",
"35-44",
"45-54",
"55 or above")),
marriage   = factor(marriage  ,
levels = 1:5,
labels = c("Single",
"Married",
"Separated/Divorced",
"Widow",
"Civil Union")),
education   = factor(education,
levels = 0:4,
labels = c("None",
"Primary",
"High School",
"University/Technical",
"Masters/PhD")),
race   = factor(race,
levels = 1:6,
labels = c("White",
"Mestizo",
"Indigenous",
"Black",
"Mulatto",
"Other")),
salary   = factor(salary,
levels = 1:4,
labels = c("Not Enough, Major Difficulties",
"Not Enough, Some Difficulties" ,
"Enough Without Great Difficulty",
"Enough and Can Save")),
religion2  = factor(religion2,
levels = 1:3,
labels = c("Catholic",
"Protestant" ,
"Other")),
religiosity  = factor(religiosity,
levels = 0:3,
labels = c("Unimportant",
"Somewhat Unimportant",
"Somewhat Important",
"Very Important")),
job  = factor(job,
levels = 1:4,
labels = c("Not Seeking Work",
"Looking for Work",
"Student",
"Employed"))
)
## Create long dataset - categorical
ven.merge.cat <- ven.merge %>%
dplyr::select(lapop, age, marriage, education, race, salary, religion2, religiosity, job) %>%
pivot_longer(cols = -lapop, names_to = "variable", values_to = "value") %>%
drop_na()
bal.tab.cat.ven <- ven.merge.cat %>%
group_by(variable) %>%
do({
unique_vals <- unique(.$value)
pval_out <- map_df(unique_vals, function(x){
x_lapop <- sum(.$value[.$lapop == 1] == x)
x_hpz <- sum(.$value[.$lapop == 0] == x)
n_lapop <- sum(.$lapop)
n_hpz <- nrow(.) - n_lapop
ptest_out <- prop.test(x = c(x_lapop, x_hpz), n = c(n_lapop, n_hpz))
data.frame(value = x,
mean_lapop = ptest_out$estimate[1],
#se_lapop = sqrt(ptest_out$estimate[1] * (1 - ptest_out$estimate[1]) / n_lapop),
mean_hpz = ptest_out$estimate[2],
#se_hpz = sqrt(ptest_out$estimate[2] * (1 - ptest_out$estimate[2]) / n_hpz),
diff_means = ptest_out$estimate[1] - ptest_out$estimate[2],
pval_difference = ptest_out$p.value) %>%
mutate(value = as.factor(value))
}) %>%
arrange(value)
pval_out
})  %>%
rename(`Variable` = variable,
`Category` = value,
`LAPOP Mean` = mean_lapop,
#`LAPOP SE` = se_lapop,
`Sample Mean` = mean_hpz,
#`Sample SE` = se_hpz,
`Diff. in Means` = diff_means,
`P-Value` = pval_difference) %>%
mutate(Variable = case_when(Variable == "age"~"Age",
Variable == "education"~"Education",
Variable == "job"~"Employment",
Variable == "marriage"~"Marriage",
Variable == "race"~"Race",
Variable == "religion2"~"Religion",
Variable == "religiosity"~"Religiosity",
Variable == "salary"~"Salary"
)) %>%
kable(align = "lccc",
caption = "Comparing LAPOP and our Sample in Venezuela: Categorical Variables",
label ="tab:lapop_ven_cat") %>%
kable_styling(bootstrap_options = "striped")
# show tables
bal.tab.num.ven %>%
kable_styling(latex_options = c("HOLD_position"), font_size = 9)
bal.tab.cat.ven %>%
kable_styling(latex_options = c("HOLD_position"), font_size = 9)
# Chunk 9: ven_left_beliefs
## Privatization
left_privatization_bi <- lm_robust(privatization ~ left_partisanship, data = df_ven)
# left_unemployed_ctrls <- lm_robust(update(pol_left ~ pol_guerilla,
#                                     reformulate(c(".", labels(demo_ctrls_ven)))), data = df_col)
left_privatization_bi_plot <- left_privatization_bi %>%
tidy() %>%
filter(grepl("left_partisanship", term)) %>%
ggplot(aes(x = term, y = estimate)) +
geom_point(size = 2, colour = "#D55E00") +
geom_label(aes(label = round(estimate, 2)), nudge_x = 0.2, colour = "#D55E00", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0, size = 1, colour = "#D55E00") +
scale_colour_manual(values = c("black")) +
geom_hline(aes(yintercept = 0), lty = "dashed") +
scale_y_continuous(breaks = c(-.5,0,.5), limits = c(-.6, .6)) +
scale_x_discrete(labels=str_wrap("Left ideology (1/0)", width = 10)) +
labs(x = "",
y = "Estimate") +
ggtitle(str_wrap("Government should own all industries (1-7)", width = 30)) +
coord_flip() +
yy_theme()
df_ven<-df_ven %>%
mutate(petro_vote = case_when(pres_vote_col == 2~1,
pres_vote_col %in% c(0, 1, 3)~0,
TRUE~NA_real_)
)
## Petro
left_petro_vote_bi <- lm_robust(petro_vote ~ left_partisanship, data = df_ven)
# left_unemployed_ctrls <- lm_robust(update(pol_left ~ pol_guerilla,
#                                     reformulate(c(".", labels(demo_ctrls_ven)))), data = df_col)
left_petro_vote_bi_plot <- left_petro_vote_bi %>%
tidy() %>%
filter(grepl("left_partisanship", term)) %>%
ggplot(aes(x = term, y = estimate)) +
geom_point(size = 2, colour = "#D55E00") +
geom_label(aes(label = round(estimate, 2)), nudge_x = 0.2, colour = "#D55E00", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0, size = 1, colour = "#D55E00") +
scale_colour_manual(values = c("black")) +
geom_hline(aes(yintercept = 0), lty = "dashed") +
scale_y_continuous(breaks = c(-.5,0,.5), limits = c(-.6, .6)) +
scale_x_discrete(labels=str_wrap("Left ideology (1/0)", width = 10)) +
labs(x = "",
y = "Estimate") +
ggtitle(str_wrap("Would vote for Petro (1/0)", width = 30)) +
coord_flip() +
yy_theme()
#plot
(left_privatization_bi_plot + left_petro_vote_bi_plot)
df_col <- df_col %>% # flip these two vars
mutate(fisc_tax_0 = case_when(fisc_tax ==1~0,
fisc_tax ==0~1, TRUE~NA_real_)
)
table(df_col$fisc_tax, df_col$fisc_tax_0)
df_col <- df_col %>% # flip these two vars
mutate(fisc_tax_0 = case_when(fisc_tax ==1~0,
fisc_tax ==0~1, TRUE~NA_real_),
mutate(fisc_service_0 = case_when(fisc_service ==1~0,
fisc_fisc_servicetax ==0~1, TRUE~NA_real_)
)
pol_left_fisc_tax_bi <- lm_robust(pol_left ~ fisc_tax_0, data = df_col)
# Ven support left - taxes go up
pol_left_fisc_tax_bi <- lm_robust(pol_left ~ fisc_tax_0, data = df_col)
pol_left_fisc_tax_ctrls <- lm_robust(update(pol_left ~ fisc_tax_0,
reformulate(c(".", labels(demo_ctrls_col)))), data = df_col)
pol_left_fisc_tax_ctrls %>%
tidy() %>%
filter(grepl("fisc_tax_0", term)) %>%
ggplot(aes(x = term, y = estimate)) +
geom_point(size = 2, colour = "#660066") +
geom_label(aes(label = round(estimate, 2)), nudge_x = 0.2, colour = "#660066", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0, size = 1, colour = "#660066") +
scale_colour_manual(values = c("black")) +
geom_hline(aes(yintercept = 0), lty = "dashed") +
scale_y_continuous(breaks = c(-.5,0,.5), limits = c(-.6, .6)) +
scale_x_discrete(labels=str_wrap("With Venezuelans, my taxes will go up (1/0)", width = 13)) +
labs(x = "",
y = "Estimate") +
ggtitle(str_wrap("Most Venezuelans in Colombia support the left (1/0)", width = 30)) +
coord_flip() +
yy_theme()
pol_left_fisc_service_bi <- lm_robust(pol_left ~ fisc_service_0, data = df_col)
pol_left_fisc_service_bi <- lm_robust(pol_left ~ fisc_service_0, data = df_col)
df_col <- df_col %>% # flip these two vars
mutate(fisc_tax_0 = case_when(fisc_tax ==1~0,
fisc_tax ==0~1, TRUE~NA_real_),
mutate(fisc_service_0 = case_when(fisc_service ==1~0,
fisc_service ==0~1, TRUE~NA_real_)
)
df_col <- df_col %>% # flip these two vars
mutate(fisc_tax_0 = case_when(fisc_tax ==1~0,
fisc_tax ==0~1, TRUE~NA_real_),
fisc_service_0 = case_when(fisc_service ==1~0,
fisc_service ==0~1, TRUE~NA_real_)
)
pol_left_fisc_service_bi <- lm_robust(pol_left ~ fisc_service_0, data = df_col)
pol_left_fisc_service_ctrls <- lm_robust(update(pol_left ~ fisc_service_0,
reformulate(c(".", labels(demo_ctrls_col)))), data = df_col)
pol_left_fisc_service_ctrls %>%
tidy() %>%
filter(grepl("fisc_service_0", term)) %>%
ggplot(aes(x = term, y = estimate)) +
geom_point(size = 2, colour = "#660066") +
geom_label(aes(label = round(estimate, 2)), nudge_x = 0.2, colour = "#660066", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0, size = 1, colour = "#660066") +
scale_colour_manual(values = c("black")) +
geom_hline(aes(yintercept = 0), lty = "dashed") +
scale_y_continuous(breaks = c(-.5,0,.5), limits = c(-.6, .6)) +
scale_x_discrete(labels=str_wrap("With Venezuelans, difficult to access govt services (1/0)", width = 13)) +
labs(x = "",
y = "Estimate") +
ggtitle(str_wrap("Most Venezuelans in Colombia support the left (1/0)", width = 30)) +
coord_flip() +
yy_theme()
